clear all
set more off
set maxvar 30000

global datadir ""
global outdir ""

use "$datadir/de-identified.dta",clear

**global variables for controls
global hh_covariates rural father_livehome adult_number  child_number mother_job mother_primary_care father_edu mother_edu mother_age fa_asset

global child_covariates baby_gender baby_age premature




*Table A1: check summary statistics difference between compete sample and incomplete sample

file open A1_A using "$outdir/A1_A.txt", ///
		write text replace
	file write A1_A "varname" _tab  _n
		
	foreach X of global hh_covariates {
	
	summ `X' if sample_complete==1
	
	local mean_com  	: display %-8.3f `r(mean)'
	local N_com  	    : display %-8.0f `r(N)'
	local V_com 	 	: display %-8.3f `r(Var)'
	local SD_com 	 	: display %-8.3f `r(sd)'
	
	summ `X' if sample_complete==0
	
	local mean_incom  	: display %-8.3f `r(mean)'
	local N_incom  	    : display %-8.0f `r(N)'
	local V_incom 	 	: display %-8.3f `r(Var)'
	local SD_incom 	 	: display %-8.3f `r(sd)'

			
	foreach M in mean_com SD_com mean_incom SD_incom{
		local `M'_str = trim("``M''")
	}
		
	file write A1_A "`X'" _tab "`mean_com_str'" _tab "`mean_incom_str'" _tab _n
	file write A1_A "`X'" _tab "`SD_com_str'" _tab "`SD_incom_str'" _tab _n
	} 
	file close A1_A 	
	
file open A1_B using "$outdir/A1_B.txt", ///
		write text replace
	file write A1_B "varname" _tab  _n
		
	foreach X of global child_covariates {
	
	
	summ `X' if sample_complete==1
	
	local mean_com  	: display %-8.3f `r(mean)'
	local N_com  	    : display %-8.0f `r(N)'
	local V_com 	 	: display %-8.3f `r(Var)'
	local SD_com 	 	: display %-8.3f `r(sd)'
	
	summ `X' if sample_complete==0
	
	local mean_incom  	: display %-8.3f `r(mean)'
	local N_incom  	    : display %-8.0f `r(N)'
	local V_incom 	 	: display %-8.3f `r(Var)'
	local SD_incom 	 	: display %-8.3f `r(sd)'

			
	foreach M in mean_com SD_com mean_incom SD_incom{
		local `M'_str = trim("``M''")
	}
		
	file write A1_B "`X'" _tab "`mean_com_str'" _tab "`mean_incom_str'" _tab _n
	file write A1_B "`X'" _tab "`SD_com_str'" _tab "`SD_incom_str'" _tab _n
	} 
	file close A1_B 
	

***The P-value of By Group Diff	
foreach X of global hh_covariates{
quietly reg `X' sample_complete rural i.day_of_week,robust
matrix define T = r(table)
estadd scalar pvalue = T[4,1]
eststo
}
esttab using "$outdir/A1_C.csv", replace b(%10.4f) se scalars(F pvalue) 	

est clear
foreach X of global child_covariates{
quietly reg `X' sample_complete rural i.day_of_week,robust
matrix define T = r(table)
estadd scalar pvalue = T[4,1]
eststo
}
esttab using "$outdir/A1_C.csv", append b(%10.4f) se scalars(F pvalue) 	

**continue 
keep if sample_complete==1


***prepare LENA measurements for analysis
gen AWC_RAW_average= (AWC_COUNT1 +  AWC_COUNT2)/2
gen CTC_RAW_average= (CT_COUNT1  +  CT_COUNT2)/2
gen CVC_RAW_average= (CV_COUNT1  +  CV_COUNT2)/2
gen AWC_PROCESSED_average= (AWC_processed_day1+ AWC_processed_day2)/2
gen CTC_PROCESSED_average= (CTC_processed_day1+ CTC_processed_day2)/2
gen CVC_PROCESSED_average= (CVC_processed_day1+ CVC_processed_day2)/2

global raw_outcomes AWC_RAW_average CTC_RAW_average CVC_RAW_average
global processed_outcomes AWC_PROCESSED_average CTC_PROCESSED_average CVC_PROCESSED_average


foreach var of global raw_outcomes{
gen ln`var'=ln(`var')
}

foreach var of global processed_outcomes{
gen ln`var'=ln(`var')
}

**prepare parental knowledge
foreach num of numlist 2 4 5 7 9 10 13 14 16 17 19 22 24 25 27 28 30{
gen parental_knowledge_`num'_correct=parental_knowledge_`num'
recode parental_knowledge_`num'_correct (3 4 5=0) (1 2=1)
}

foreach num of numlist 1 3 6 8 11 12 15 18 20 21 23 26 29{
gen parental_knowledge_`num'_correct=parental_knowledge_`num'
recode parental_knowledge_`num'_correct (4 5=1) (1 2 3=0)
}

egen sum_parental_knowledge = rowtotal(parental_knowledge_2_correct - parental_knowledge_30_correct parental_knowledge_1_correct-parental_knowledge_29_correct)


***prepare self-efficacy & social support
egen self_efficacy = rowtotal(self_efficacy_1 self_efficacy_2 self_efficacy_3 self_efficacy_4)
irt pcm self_efficacy_1 self_efficacy_2 self_efficacy_3 self_efficacy_4
predict self_efficacy_irt, latent

egen social_support = rowtotal(social_support_1 social_support_2 social_support_3 social_support_4)
irt pcm social_support_1 social_support_2 social_support_3 social_support_4
predict social_support_irt, latent

*Parental Belief
**convert parental belief measures	
gen belief_1=parental_belief_1
gen belief_2=parental_belief_2

rename credi_z_LANG credi_lang

gen belief_1_langp5p95=belief_1
sum credi_lang,detail
recode belief_1_langp5p95 (1=`r(p5)') (2=`r(p25)') (3=`r(p50)') (4=`r(p75)') (5=`r(p95)')

gen belief_2_langp5p95=belief_2

sum credi_lang,detail
recode belief_2_langp5p95 (1=`r(p5)') (2=`r(p25)') (3=`r(p50)') (4=`r(p75)') (5=`r(p95)')

gen belief_lang_p5p95=belief_1_langp5p95-belief_2_langp5p95

**raw parental belief measures
recode parental_belief_2 (5=1)(4=2)(3=3)(2=4)(1=5)

egen belief_overall_rawtotal=rowtotal(parental_belief_1  parental_belief_2)


*Table 1: summary statistics

file open T1_A using "$outdir/T1_A.txt", ///
		write text replace
	file write T1_A "varname" _tab  _n
		
	foreach X of global hh_covariates  {
	
	// treatment_E stats
	
	summ `X'
	
	local mean  	: display %-8.3f `r(mean)'
	local N  	    : display %-8.0f `r(N)'
	local V 	 	: display %-8.3f `r(Var)'
	local SD 	 	: display %-8.3f `r(sd)'

			
	foreach M in mean SD	{
		local `M'_str = trim("``M''")
	}
		
	file write T1_A "`X'" _tab "`mean_str'" _tab  _n
	file write T1_A "`X'" _tab "`SD_str'" _tab  _n
	} 
	file close T1_A 	
	
file open T1_B using "$outdir/T1_B.txt", ///
		write text replace
	file write T1_B "varname" _tab  _n
		
	foreach X of global child_covariates {
	
	
	summ `X'
	
	local mean  	: display %-8.3f `r(mean)'
	local N  	    : display %-8.0f `r(N)'
	local V 	 	: display %-8.3f `r(Var)'
	local SD 	 	: display %-8.3f `r(sd)'

			
	foreach M in mean SD	{
		local `M'_str = trim("``M''")
	}
		
	file write T1_B "`X'" _tab "`mean_str'" _tab  _n
	file write T1_B "`X'" _tab "`SD_str'" _tab  _n
	} 
	file close T1_B 	
	
***Table 2: LENA measure summary statistics
file open T2 using "$outdir/T2.txt", ///
		write text replace
	file write T2 "varname" _tab "5th" _tab "25th" _tab "50th" _tab "75th" _tab "95th" _tab _n
		
	foreach X of varlist AWC_PROCESSED_average CTC_PROCESSED_average CVC_PROCESSED_average {
	
	
	summ `X',detail
	
	local mean  	: display %-8.3f `r(mean)'
	local SD 	 	: display %-8.3f `r(sd)'
	local 5th    	: display %-9.3f `r(p5)'
	local 25th  	: display %-9.3f `r(p25)'
	local 50th  	: display %-9.3f `r(p50)'
	local 75th  	: display %-9.3f `r(p75)'
	local 95th  	: display %-9.3f `r(p95)'

			
	foreach M in mean SD 5th 25th 50th 75th 95th{
		local `M'_str = trim("``M''")
	}
		
	file write T2 "`X'" _tab "`mean_str'" _tab "`SD_str'" _tab "`5th_str'" _tab "`25th_str'" _tab  "`50th_str'" _tab "`75th_str'" _tab  "`95th_str'" _tab _n
	} 
	file close T2 	


***Figure 1-3
twoway (kdensity AWC_PROCESSED_average) (scatteri 0 14025 0.00008 14025, recast(line)) ///
(scatteri 0 21098 0.00008 21098, recast(line)), xtitle(Adult Word Count (AWC)) ///
legend (label(2 "Sample mean") ///
label(3 "Urban sample mean (Zhang et al, (2015))") order (2 3) col(1))  scheme(sj) note("Source: authors' survey")
graph export "$outdir/Fig1.pdf",replace

twoway (kdensity CTC_PROCESSED_average) (scatteri 0 565 0.0015 565, recast(line)) ///
(scatteri 0 751 0.0015 751, recast(line)), xtitle(Conversational Turn Count (CTC)) ///
legend (label(2 "Sample mean") ///
label(3 "Urban sample mean (Zhang et al, (2015))") order (2 3) col(1))  scheme(sj)  note("Source: authors' survey")
graph export "$outdir/Fig2.pdf",replace


twoway (kdensity CVC_PROCESSED_average) (scatteri 0 1932 0.0006 1932, recast(line)) ///
,xtitle(Child Vocalization Count (CVC)) ///
legend (label(2 "Sample mean") order (2) col(1)) scheme(sj)  note("Source: authors' survey")

graph export "$outdir/Fig3.pdf",replace


**parental measures tabulate

file open T3 using "$outdir/T3.txt", ///
		write text replace
	file write T3 "varname" _tab  _n
		
	foreach X of varlist sum_parental_knowledge self_efficacy social_support  belief_overall_rawtotal {
	
	
	summ `X'
	
	local mean  	: display %-8.3f `r(mean)'
	local N  	    : display %-8.0f `r(N)'
	local V 	 	: display %-8.3f `r(Var)'
	local SD 	 	: display %-8.3f `r(sd)'

			
	foreach M in mean SD	{
		local `M'_str = trim("``M''")
	}
		
	file write T3 "`X'" _tab "`mean_str'" _tab  _n
	file write T3 "`X'" _tab "`SD_str'" _tab  _n
	} 
	file close T3
 	
***Appendix Table 2: parental measures tabulate(detailed)
asdoc tabulate self_efficacy_1 , replace nocf format(%9.3f) label  vallab save($outdir/A2.doc) title(Appendix Table 2: Detailed Parental Measure)
foreach var of varlist self_efficacy_2 self_efficacy_3 self_efficacy_4 ///
social_support_1 social_support_2 social_support_3 social_support_4 ///
parental_belief_1 parental_belief_2{
asdoc tabulate `var', append nocf format(%9.3f) label  vallab save($outdir/A2.doc) title(Appendix Table 2: Detailed Parental Measure)
}	

**Table 3-6: main regression
foreach var of  varlist lnAWC_PROCESSED_average lnCTC_PROCESSED_average lnCVC_PROCESSED_average{
est clear
eststo:xi:reg `var' sum_parental_knowledge $hh_covariates $child_covariates i.day_of_week , robust
eststo:xi:reg `var' self_efficacy_irt $hh_covariates $child_covariates i.day_of_week , robust
eststo:xi:reg `var' social_support_irt $hh_covariates $child_covariates i.day_of_week , robust
eststo:xi:reg `var' belief_lang_p5p95 $hh_covariates $child_covariates i.day_of_week , robust
eststo:xi:reg `var' self_efficacy_irt social_support_irt sum_parental_knowledge belief_lang_p5p95 $hh_covariates $child_covariates i.day_of_week , robust

esttab using "$outdir/Table_`var'.csv", replace b(3) se(3) r2(2) star(* 0.05 ** 0.01 *** 0.001) label  noconstant  nonotes addnotes("$* p < 0.05$, $** p < 0.01$, $*** p < 0.001$.")
}

**correlation between parental factors
asdoc pwcorr sum_parental_knowledge self_efficacy_irt social_support_irt belief_lang_p5p95, setstars(***@.001, **@.01, *@.05) save($outdir/S3_table.doc) title(S3 Table: Correlation between the Parental Factors) replace


**S4 Table:  quantile regression (CTC)
foreach var of  varlist lnAWC_PROCESSED_average lnCTC_PROCESSED_average lnCVC_PROCESSED_average{
est clear
foreach n of numlist 0.25 0.5 0.75{
eststo:xi:qreg `var'  self_efficacy_irt social_support_irt sum_parental_knowledge belief_lang_p5p95 $hh_covariates $child_covariates i.day_of_week , quantile(`n') 
}
esttab using "$outdir/S4 Table_`var'.csv", replace b(3) se(3) r2(2) star(* 0.05 ** 0.01 *** 0.001) label  noconstant  nonotes addnotes("$* p < 0.05$, $** p < 0.01$, $*** p < 0.001$.")
}



